********************
* DATA REPLICATION *
********************

global code "/homeKRTK/health_shocks/Replication_package"
global in "/homeProspSSD/Admin3"
global in_a "/homeProspSSD/Admin3/Archiv_v2"
global in_fekvo "/homeProspSSD/Admin3_H2_NEAK_fekvo/Archiv"
global in_med "/homeProspSSD/Admin3_H2_NEAK_veny/"
global out "/homeKRTK/health_shocks"
global out_AKM "/homeKRTK/health_shocks/AKM-FEs"
global out_HS "/homeKRTK/health_shocks/explore_health_shocks"
global out_ONET "/homeKRTK/health_shocks/ONET"
global in_ONET "C:/Users/Bíró/Documents/ONET"
global temp "/homeKRTK/health_shocks/coworkers"


* creating firm quality indicators
	* "$out/firmquality.dta"
do "$code/Firm_quality.do"

* creating the AKM FEs
	* "$out_AKM/FEi_full_Jan2023.dta"
	* "$out_AKM/FEj_full_Jan2023.dta"
do "$code/AKM_v3.do"

* creating the AKM FE-s including 2-digit occupation FEs
	* "$out_AKM/FEi_o2firm.dta"
	* "$out_AKM/FEj_o2firm.dta"
	* "$out_AKM/FEo_o2firm.dta"
	* "$out_AKM/FEi_o2.dta"
	* "$out_AKM/FEo_o2.dta"
do "$code/AKM_occupation.do"

* creating information about in-patient care
	* "$out_HS/bno_fekvo_anon_t.dta"
do "$code/create_spells.do"

* creating the correspondence between ONET and Hungarian "FEOR" occupation codes
	* "$out_ONET/feor93.dta"
	* "$out_ONET/feor08.dta"
do "$code/onetsoc_to_isco_cws_ibs_txt_AB.do"

* create occupation type indices from ONET data, assigning to Hungarian "FEOR" occupation codes
	* "$out/onet_indices_feor93.dta"
	* "$out/onet_indices_feor08.dta""
do "$code/onet_indices.do"
	* inputs:
		* "$out_ONET/feor93.dta"
		* "$out_ONET/feor08.dta"

* creating the monthly dataset of accidents
	* "$out/bnoSext_merge_allS.dta"
do "$code/create_bnoSext_merge_allS.do"
	* inputs:
		* "$out_HS/bno_fekvo_anon_t.dta"

* creating the database of accidents with the length of absence and the type of the accident	
	* "$out/bnoSall_with_d.dta"
do "$code/create_bnoSall_with_d.do"
	* inputs:
		* "$out/bnoSext_merge_allS.dta"
	
* creating the set of people to be included in the analysis with health information 
	* "$out/placebo_base_bnoSext_2023Dec.dta"
do "$code/2023.12.03. - create_place_base_bnoSext.do"
	* inputs:
		* "$out_HS/bno_fekvo_anon_t.dta"

* creating the extended set of accidents
	* $out/bnoSext_merge.dta"
do "$code/2022.02.23. - create_place_base_bnoSext.do"
	* inputs:
		* "$out_HS/bno_fekvo_anon_t.dta"	
		
* creating employment information around accidents
	* "$out/emp_info_around_accidents_bnoSext_2022Sept.dta"
	* "$out/base_shock_data_w_postemp_info_bnoSext_2022Sept.dta"
do "$code/2022.09.08. - create_emp_info_around_XX_w_age_20_50_TLM_BM_bnoSext.do"
	* inputs:
		* "$out/placebo_base_bnoSext_2023Dec.dta"

* getting the correct d for the people having an accident
	* "$out/correct_d_for_HSnoABS.dta"
do "$code/create_correct_d_for_HSnoABS.do"
	* inputs:
		* "$out/emp_info_around_accidents_bnoSext_2022Sept.dta"
		
* creating the list of treated with information on absence, length of absence and time of accident and return
	* "$out/IVdata_HS1_accidents_bnoSext2_SL5_2022Sept26.dta"
do "$code/2022.09.26. - create_treated_for_IV_accidents_TLM_bnoSext2_SL5.do"
	* inputs:
		* "$out/emp_info_around_accidents_bnoSext_2022Sept.dta"

* generating the sample of potential controls with a random d following the distribution of the treated	
	* "$out/potential_controls_updated_tlm_bnoSext2_SL5_d2_6_distrib_in_EMP_2025March19.dta"
	* "$out/HS0temp1_bnoSext_2022Sept.dta"
do "$code/2025.03.19. - create_potential_controls_w_data_cleaning_updated_TLM_bnoSext_SL5_d2_6_distrib_in_EMP.do"
	* inputs:
		* "$out/placebo_base_bnoSext_2023Dec.dta"

* generating the sample of potential controls with a random d from 2 to 12 as a robustness
	* "$out/potential_controls_updated_tlm_bnoSext2_SL5_d2_12_distrib_in_EMP_2025March25.dta"
do "$code/2025.03.25. - create_potential_controls_w_data_cleaning_updated_TLM_bnoSext_SL5_d2_12_distrib_in_EMP.do""
	* inputs:
		* "$out/HS0temp1_bnoSext_2022Sept.dta"

* generating the sample of potential controls with a random d from 3 to 6 as a previous version
	* "$out/potential_controls_updated_tlm_bnoSext2_SL5_d3_6_in_EMP_2023Nov22.dta"
do "$code/2023.11.22. - create_potential_controls_w_data_cleaning_updated_TLM_bnoSext_SL5_d3_6_in_EMP.do"
	* inputs:
		* "$out/HS0temp1_bnoSext_2022Sept.dta"

* generating the sample of potential controls with random d of 3-6 distributed as treated as a previous version
	* "$out/potential_controls_updated_tlm_bnoSext2_SL5_d3_6_distrib_in_EMP_2024Jan10.dta"
do "$code/2024.01.10. - create_potential_controls_w_data_cleaning_updated_TLM_bnoSext_SL5_d3_6_distrib_in_EMP.do"
	* inputs:
		* "$out/HS0temp1_bnoSext_2022Sept.dta"

* generating the sample of potential controls with d=1
	* "$out/potential_controls_updated_tlm_bnoSext2_SL5_nod_2024Apr22.dta"
do "code/2024.04.22 - create_potential_controls_w_data_cleaning_updated_TLM_bnoSext_SL5_nod.do"
	* inputs:
		* "$out/HS0temp1_bnoSext_2022Sept.dta"
		
* generating the full dataset including controls and collecting variables around the event used in the analysis
	*"$out/IVdata_accidents_bnoSext2_SL5_all_controls_d_distrib_as_treated_2025March19.dta"
do "$code/2025.03.19. - create_data_for_IV_bnoSext_SL5_all_controls_d_distrib_as_treated.do"
	* inputs:
		* "$out/potential_controls_updated_tlm_bnoSext2_SL5_d2_6_distrib_in_EMP_2025March19.dta"		
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta", 
		* "$out_AKM/FEi_full_Jan2023.dta"	

* generating the full dataset including controls (with d 3-6) and collecting variables around the event used in the analysis
	* "$out/IVdata_accidents_bnoSext2_SL5_2023Nov22.dta"
do "$code/2023.11.22. - create_data_for_IV_bnoSext_SL5_BMcorrected.do"
	* inputs:
		* "$out/potential_controls_updated_tlm_bnoSext2_SL5_d3_6_in_EMP_2023Nov22.dta"
		* "$out/IVdata_HS1_accidents_bnoSext2_SL5_2022Sept26.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta", 
		* "$out_AKM/FEi_full_Jan2023.dta"

* generating the full dataset with controls (d 3-6 distributed as treated), variables around the event used in the analysis
	* "$out/IVdata_accidents_bnoSext2_SL5_ddistribrob_2024Jan10.dta""
do "$code/2024.01.10. - create_data_for_IV_bnoSext_SL5_BMcorrected_ddistribrob.do"
	* inputs:
		* "$out/potential_controls_updated_tlm_bnoSext2_SL5_d3_6_distrib_in_EMP_2024Jan10.dta"
		* "$out/IVdata_HS1_accidents_bnoSext2_SL5_2022Sept26.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta", 
		* "$out_AKM/FEi_full_Jan2023.dta"

		
* generating the full dataset with controls employed in m=0, with d=1
	* "$out/IVdata_accidents_bnoSext2_SL5_all_controls_nod_2024Apr22.dta"
do "code/2024.04.22. - create_data_for_IV_bnoSext_SL5_all_controls_nod.do"
	* inputs:
		* "$out/potential_controls_updated_tlm_bnoSext2_SL5_nod_2024Apr22.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta", 
		* "$out_AKM/FEi_full_Jan2023.dta"
		
* generating the full dataset with controls for falsification (d=1), variables around the event used in the analysis
	* "$out/IVdata_accidents_bnoSext2_SL5_all_controls_d_1_2025March24.dta""
do "$code/2025.03.24. - create_data_for_IV_bnoSext_SL5_all_controls_d_1.do"
	* inputs:
		* "$out/potential_controls_updated_tlm_bnoSext2_SL5_d2_6_distrib_in_EMP_2025March19.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta", 
		* "$out_AKM/FEi_full_Jan2023.dta"

* generating the full dataset with controls for robustness (d: 2-12), variables around the event used in the analysis
	* "$out/IVdata_accidents_bnoSext2_SL5_all_controls_d2_12_distrib_as_treated_2025March25.dta""
do "$code/2025.03.25. - create_data_for_IV_bnoSext_SL5_all_controls_d2_12_distrib_as_treated"
	* inputs:
		* "$out/potential_controls_updated_tlm_bnoSext2_SL5_d2_12_distrib_in_EMP_2025March25.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta", 
		* "$out_AKM/FEi_full_Jan2023.dta"

* create the data for the main regressions, with controls having a random d of 3-6
	* "$out/data_for_main_regs_20231122.dta"
do "$code/2023.11.22. - recreate_data_for_main_regressions.do"
	* inputs:
		* "$out/IVdata_accidents_bnoSext2_SL5_2023Nov22.dta"
		* "$out_AKM/FEi_full_Jan2023.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta"

* create the data for the main regressions, with controls having a random d of 3-6 distributed as treated
	* "$out/data_for_main_regs_ddistrirrob_20240110.dta"
do "$code/2024.01.11. - recreate_data_for_main_regressions_ddistribrob.do"	
	* inputs:
		* "$out/IVdata_accidents_bnoSext2_SL5_ddistribrob_2024Jan10.dta"
		* "$out_AKM/FEi_full_Jan2023.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta"
		
* create the data for the main event study regressions	
	* "$out/data_for_main_regs_event_study_20231203.dta"	
do "$code/2023.12.03. - recreate_data_for_main_regressions_event_study.do"
	* inputs:
		* "$out/IVdata_accidents_bnoSext2_SL5_2023Nov22.dta"
		* "$out_AKM/FEi_full_Jan2023.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta"

* create the set of potential controls for within firm matching (with d 3-6)		
	* "$out/potential_controls_for_within_firm_match_broad_list_2023Dec.dta"
	* "$out/potential_controls_for_within_firm_match_2023Dec.dta"
do "$code/2023.12.03. - controls_for_within_firm_matching.do"
	* inputs:
		* "$out/IVdata_accidents_bnoSext2_SL5_2023Nov22.dta"
		* "$out/base_shock_data_w_postemp_info_bnoSext_2022Sept.dta"
		* "$out/HS0temp1_bnoSext_2022Sept.dta"
		* "$out_AKM/AKM-FEs/FEi_full_Jan2023.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/AKM-FEs/FEj_full_Jan2023.dta"
		
* create data for the regressions, including controls with d 2-6 following the distribution of d for the treated  	
	* "$out/additional_data_for_all_regs_controls_d_distrib_as_treated_20250319.dta"
do "$code/2025.03.19. - recreate_data_for_all_controls_w_event_study_vars_d_distrib_as_treated.do"
	* inputs:
		* "$out/IVdata_accidents_bnoSext2_SL5_all_controls_d_distrib_as_treated_2025March19.dta"
		* "$out/IVdata_accidents_bnoSext2_SL5_2023Feb13.dta"
		* "$out_AKM/FEi_full_Jan2023.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta"

* creating data for the falsification regressions with d=1 for all the controls: 
	* "$out/additional_data_for_falsification_regs_controls_d_1_20250324.dta"
do "$code/2025.03.24. - recreate_data_for_falsification_controls_w_event_study_vars_d_1.do"
	* inputs:
		* "$out/IVdata_accidents_bnoSext2_SL5_all_controls_d_1_2025March24.dta"
		* "$out/IVdata_accidents_bnoSext2_SL5_2023Feb13.dta"
		* "$out_AKM/FEi_full_Jan2023.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta"

* create data for the robustness regression: controls with d 2-12 following the distribution of d for the treated  	
	* "$out/additional_data_for_all_regs_controls_d2_12_distrib_as_treated_20250325.dta"
do "$code/2025.03.25. - recreate_data_for_all_controls_w_event_study_vars_d2_12_distrib_as_treated.do"
	* inputs:
		* "$out/IVdata_accidents_bnoSext2_SL5_all_controls_d2_12_distrib_as_treated_2025March25.dta"
		* "$out_AKM/FEi_full_Jan2023.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta"
	
* create data for the controls in the noABS event study: 
	* "$out/additional_data_for_IV_and_placebo_regs_20240416.dta"
do "$code/2024.04.16. - recreate_data_for_nonABS_HS_w_event_study_vars_BM.do"
	* inputs:
		* "$out/IVdata_accidents_bnoSext2_SL5_2023Nov22.dta
		* "$out/correct_d_for_HSnoABS.dta"
		* "$out_AKM/FEi_full_Jan2023.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta"

* create data for futures regressions with controls having a d of 1
	* "$out/data_for_futures_regs_with_d_2_6_20240508.dta"
do "$code/2024.05.08. - recreate_data_for_futures_regressions_w_event_study_vars_d_2_6_BM.do"
	* inputs:
		* "$out/IVdata_accidents_bnoSext2_SL5_2023Nov22.dta"
		* "$out/correct_d_for_HSnoABS.dta
		* "$out/data_for_main_regs_20231122.dta	
		* "$out_AKM/FEi_full_Jan2023.dta"
		* "$out/firmquality.dta"
		* "$out_AKM/FEj_full_Jan2023.dta"

* create data for the co-worker analysis
	* "$out/list_of_people_with_accidents.dta"
	* "$out/co_workers_with_controls_event_level_20250704_BM.dta"
do "$code/2024.10.14. - matching_for_peers_in_firms_mod_rerun_20250707_BM.do"
	* inputs:
		* "$out/data_for_main_regs_event_study_20231203.dta"
		* "$out/additional_data_for_IV_and_placebo_regs_20240416.dta"
		* "$out/IVdata_accidents_bnoSext2_SL5_all_controls_nod_2024Apr22.dta"
		
* create further data for the co-worker analysis including the number of co-workers
		* "$out/coworker_quarter_AB_ModTreated.dta"
		* "$out/num_co_workers_in_premonth_coworker_AB_ModTreated.dta"
		* "$out/coworker_regfile_with_ncoworkers_AB_ModTreated_clean.dta"
do "$code/coworker_data_creation_for_regression.do"
	* inputs:
		* "$out/co_workers_with_controls_event_level_20250704_BM.dta"
		* "$out_AKM/FEj_full_Jan2023.dta"
		
* create the list of firms to be excluded from the coworker analysis
	* "$out/coworkers_firms_exclude_AB.dta"
do "$code/samples_comparison_AB_BM.do"
	* inputs:
		* "$out/data_for_main_regs_event_study_20231203.dta"	
		* "$out/additional_data_for_IV_and_placebo_regs_20240416.dta"
		* "$out/additional_data_for_all_regs_controls_d_distrib_as_treated_20250319.dta"
		* "$out/firmquality.dta"
		* "$out/coworker_regfile_with_ncoworkers_AB_ModTreated_clean.dta"